package com.shujia.spark.sql

import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}

object Demo8Submit {
  def main(args: Array[String]): Unit = {
    val spark: SparkSession = SparkSession
      .builder()
      //.master("local")
      .appName("submit")
      .getOrCreate()
    import spark.implicits._
    import org.apache.spark.sql.functions._

    //读取数据
    val linesDF: DataFrame = spark.read
      .format("csv")
      .option("sep", "|")
      .schema("line STRING")
      .load("/data/words")


    //统计单词的数量
    val wordCountDF: DataFrame = linesDF
      .select(explode(split($"line", ",")) as "word")
      .groupBy($"word")
      .agg(count($"word") as "count")

    //保存数据
    wordCountDF.write
      .format("csv")
      .option("sep", "\t")
      .mode(SaveMode.Overwrite)
      .save("/data/sql_wordcount")

    /**
     *
     * spark-submit --master yarn-client --class com.shujia.spark.sql.Demo8Submit --conf spark.sql.shuffle.partitions=1 spark-1.0-SNAPSHOT.jar
     *
     */

  }

}
